/*
  author Sylvain Bertrand <sylvain.bertrand@gmail.com>
  Protected by linux GNU GPLv2
  Copyright 2012-2014
*/
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/fs.h>
#include <linux/irq.h>
#include <linux/cdev.h>
#include <linux/slab.h>
#include <asm/uaccess.h>
#include <asm/ioctl.h>
#include <linux/dma-direction.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/atomic.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/poll.h>

#include <alga/rng_mng.h>
#include <uapi/alga/pixel_fmts.h>
#include <alga/timing.h>
#include <uapi/alga/amd/dce6/dce6.h>
#include <uapi/alga/amd/si/ioctl.h>

#include <uapi/alga/amd/si/pkt.h>

#include "../mc.h"
#include "../rlc.h"
#include "../ih.h"
#include "../fence.h"
#include "../ring.h"
#include "../dmas.h"
#include "../ba.h"
#include "../cps.h"
#include "../gpu.h"
#include "../drv.h"

#define FOPS_C
#include "fops.h"
#undef FOPS_C
#include "mmap.h"
#include "dma.h"
#include "dce.h"

#include "../regs.h"

#include "private.h"

dev_t devt_region;

LIST_HEAD(files_private_data);
static spinlock_t files_private_data_lock;

void fops_init_once(void)
{
	spin_lock_init(&files_private_data_lock);
	INIT_LIST_HEAD(&files_private_data);
}

static int open(struct inode *i, struct file *f)
{
	struct file_private_data *data;
	struct dev_drv_data *dd;

	/* FIXME: should close file options/flags possibities */

	dd = container_of(i->i_cdev, struct dev_drv_data, char_cdev);

	data = kzalloc(sizeof(*data), GFP_KERNEL);
	if (!data) {
		dev_err(&dd->dev->dev, "no mem for file private data\n");
		return -ENOMEM;
	}
	spin_lock(&files_private_data_lock);
	list_add(&data->n, &files_private_data);
	spin_unlock(&files_private_data_lock);

	data->d = dd->dev;
	init_waitqueue_head(&data->evts_wq);
	INIT_LIST_HEAD(&data->evts);
	spin_lock_init(&data->evts_lock);
	f->private_data = (void*)data;
	return 0;
}

static long si_mem_alloc(struct pci_dev *dev, struct si_mem  __user *mem)
{
	struct dev_drv_data *dd;
	u64 sz;
	u64 align;
	u64 gpu_addr;
	long r;

	get_user(sz, &mem->sz);
	get_user(align, &mem->align);

	dd = pci_get_drvdata(dev);

	r = rng_alloc_align(&gpu_addr, &dd->vram.mng, sz, align); 
	if (r == 0)
		put_user(gpu_addr, &mem->gpu_addr);
	else
		r = -ENODEV;
	return r;
}

static void si_mem_free(struct pci_dev *dev, u64  __user *user_gpu_addr)
{
	u64 gpu_addr;
	struct dev_drv_data *dd;

	get_user(gpu_addr, user_gpu_addr);

	dd = pci_get_drvdata(dev);

	rng_free(&dd->vram.mng, gpu_addr);
}

static long si_dma(struct pci_dev *dev, struct si_dma __user *user_dma)
{
	struct si_dma dma;
	long r;

	if(copy_from_user(&dma, user_dma, sizeof(dma)))
		return -EFAULT;

	r = 0;

	switch (dma.type) {
	case SI_DMA_TYPE_L2L:
		r = fops_dma_l2l(dev, &dma);
		break;
	case SI_DMA_TYPE_U32_FILL:
		r = fops_dma_u32_fill(dev, &dma);
		break;
	default:
		dev_err(&dev->dev, "ioctl:dma:unknown operation type\n");
		return -ENODEV;
	};
	return r;
}

static long si_cpu_addr_to_gpu_addr(struct pci_dev *dev,
				union si_cpu_addr_to_gpu_addr __user *arg)
{
	struct dev_drv_data *dd;
	void __iomem *cpu_addr;
	struct vm_area_struct *vma;
	u64 gpu_addr;
	long r;

	get_user(cpu_addr, (void __iomem **)&arg->cpu_addr);

	dd = pci_get_drvdata(dev);

	r = cpu_addr_to_aperture_gpu_addr(dev, &gpu_addr, cpu_addr, &vma);
	if (r == 0)
		put_user(gpu_addr, &arg->gpu_addr);
	else
		r = -EFAULT;
	return r;
}

#define GPU_3D_IB_DWS_N 4
static long si_gpu_3d_ib(struct pci_dev *dev, struct si_gpu_3d_ib __user
								*user_gpu_3d_ib)
{
	struct si_gpu_3d_ib gpu_3d_ib;
	struct dev_drv_data *dd;
	long r;

	if (copy_from_user(&gpu_3d_ib, user_gpu_3d_ib, sizeof(gpu_3d_ib)))
		return -EFAULT;

	dd = pci_get_drvdata(dev);

	spin_lock(&dd->gpu_3d.lock);

	r = ring_wait(&dd->gpu_3d.ring, GPU_3D_IB_DWS_N,
						gpu_3d_ib.ring_t_info.n_max,
						gpu_3d_ib.ring_t_info.us);	
	if (r == -RING_WAIT_TIMEOUT) {
		spin_unlock(&dd->gpu_3d.lock);
		return SI_RING_TIMEOUT;
	}

	gpu_3d_ring_wr(dev, PKT3(PKT3_IB, 3));
	gpu_3d_ring_wr(dev, lower_32_bits(gpu_3d_ib.gpu_addr));
	gpu_3d_ring_wr(dev, upper_32_bits(gpu_3d_ib.gpu_addr));
	gpu_3d_ring_wr(dev, gpu_3d_ib.dws_n);
	gpu_3d_ring_commit(dev);

	spin_unlock(&dd->gpu_3d.lock);
	return 0;
}
#undef GPU_3D_IB_DWS_N

#define GPU_3D_FENCE_DWS_N 14
static long si_gpu_3d_fence(struct pci_dev *dev,
			struct si_gpu_3d_fence __user *user_gpu_3d_fence) 
{
	struct si_gpu_3d_fence gpu_3d_fence;
	struct dev_drv_data *dd;
	u64 wb_fence_gpu_addr;
	u32 fence_seq_n;
	long r;

	dd = pci_get_drvdata(dev);

	if (copy_from_user(&gpu_3d_fence, user_gpu_3d_fence,
							sizeof(gpu_3d_fence)))
		return -EFAULT;

	spin_lock(&dd->gpu_3d.lock);
	r = ring_wait(&dd->gpu_3d.ring, GPU_3D_FENCE_DWS_N,
						gpu_3d_fence.t_info.ring.n_max,
						gpu_3d_fence.t_info.ring.us);
	if (r == -RING_WAIT_TIMEOUT) {
		spin_unlock(&dd->gpu_3d.lock);
		return SI_RING_TIMEOUT;
	}

	wb_fence_gpu_addr = dd->ba.wb_map->gpu_addr + WB_GPU_3D_RPTR_OF;
	fence_seq_n = fence_seq_n_get(&dd->gpu_3d.fence);


	/* flush some read caches (read/write caches are read cache too */	
	gpu_3d_ring_wr(dev, PKT3(PKT3_SURF_SYNC, 4));
	/* CP_COHER_CTL_0 */
	gpu_3d_ring_wr(dev, CCC_TCL1_ACTION_ENA | CCC_TCL2_ACTION_ENA
			| CCC_SH_KCACHE_ACTION_ENA | CCC_SH_ICACHE_ACTION_ENA);
	/* CP_COHER_SZ */
	gpu_3d_ring_wr(dev, 0xffffffff);
	/* CP_COHER_BASE */
	gpu_3d_ring_wr(dev, 0);
	/* polling interval, 0xa(10) * 16 clocks */
	gpu_3d_ring_wr(dev, 0x0000000a);

	/*
	 * EOP event with flushing of many write caches (read/write caches are
	 * write caches, see silicium_blks/caches)
	 */
	gpu_3d_ring_wr(dev, PKT3(PKT3_EVENT_WR_EOP, 5));
	gpu_3d_ring_wr(dev, set(PKT3_EVENT_IDX, 5) | set(VEI_EVENT_TYPE,
					VEI_CACHE_FLUSH_AND_INV_TS_EVENT));
	gpu_3d_ring_wr(dev, lower_32_bits(wb_fence_gpu_addr));
	gpu_3d_ring_wr(dev, upper_32_bits(wb_fence_gpu_addr)
				| set(PKT3_DATA_SEL, 1) | set(PKT3_INT_SEL, 2));
	gpu_3d_ring_wr(dev, fence_seq_n);
	gpu_3d_ring_wr(dev, 0);
	gpu_3d_ring_commit(dev);
	spin_unlock(&dd->gpu_3d.lock);

	r = fence_wait(&dd->gpu_3d.fence, fence_seq_n,
		gpu_3d_fence.t_info.fence.n_max, gpu_3d_fence.t_info.fence.us);
	if (r == -FENCE_TIMEOUT)
		return SI_FENCE_TIMEOUT;
	return 0;
}
#undef GPU_3D_FENCE_DWS_N

static long unlocked_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
{
	struct file_private_data *data;
	struct pci_dev *dev;
	long r;
	unsigned nr;

	data = f->private_data;

	nr = _IOC_NR(cmd);
	if (nr == SI_CONTEXT_LOST_ACK) {
		data->context_lost = 0;
		return 0;
	}

	if (data->context_lost)
		return SI_CONTEXT_LOST;

	dev = data->d;

	r = 0;
	switch (nr) {
	case SI_DCE_DP_SET:
		r = fops_dce_dp_set(dev, (void __user *)arg);
		break;
	case SI_DCE_DP_DPM:
		r = fops_dce_dp_dpm(dev, (u8 __user *)arg);
		break;
	case SI_DCE_PF:
		r = fops_dce_pf(dev, (u8 __user *)arg, data);
		break;
	case SI_DCE_EDID:
		r = fops_dce_edid(dev, (struct si_dce_edid __user *)arg);
		break;
	case SI_MEM_ALLOC:
		r = si_mem_alloc(dev, (struct si_mem __user *)arg);
		break;
	case SI_MEM_FREE:
		si_mem_free(dev, (u64  __user *)arg);
		break;
	case SI_CPU_ADDR_TO_GPU_ADDR:
		r = si_cpu_addr_to_gpu_addr(dev,
				(union si_cpu_addr_to_gpu_addr __user *)arg);
		break;
	case SI_GPU_3D_IB:
		r = si_gpu_3d_ib(dev, (struct si_gpu_3d_ib __user *)arg);
		break;
	case SI_GPU_3D_FENCE:
		r = si_gpu_3d_fence(dev, (struct si_gpu_3d_fence __user *)arg);
		break;
	case SI_DMA:
		r = si_dma(dev, (struct si_dma __user *)arg);
		break;
	default:
		r = -EINVAL;
		break;
	}
	return r;
}

static void evts_cleanup(struct file_private_data *data)
{
	struct dev_drv_data *dd;
	struct fops_evt *fops_evt;
	struct fops_evt *tmp;

	dd = pci_get_drvdata(data->d);

	dce6_pf_cancel_all(dd->dce);

	list_for_each_entry_safe(fops_evt, tmp, &data->evts, n) {
		list_del(&fops_evt->n);
		kfree(fops_evt);
	}
}

static int release(struct inode *i, struct file *f)
{	
	struct file_private_data *data;
	
	data = f->private_data;
	spin_lock(&files_private_data_lock);
	list_del(&data->n);
	spin_unlock(&files_private_data_lock);

	evts_cleanup(data);

	f->private_data = NULL;
	kfree(data);
	return 0;
}

/*
 * Must hold the lock which protects the evts list and the evts list must not be
 * empty. Will copy whatever it can in a freshly allocated array ready to be
 * sent to user space.
 */
static void si_evts_dequeue(struct si_evt **evts_cpy, u64 *evts_n,
					struct list_head *fops_evts, u64 buf_sz)
{
	*evts_n = 0; /* use it as an index in the evt cpy array */
	*evts_cpy = NULL;

	if (list_empty(fops_evts))
		return;

	while (((*evts_n + 1) * sizeof(**evts_cpy)) <= buf_sz) {
		struct fops_evt *fops_evt;
		struct si_evt *si_evt_from_list;
		struct si_evt *evts_cpy_new;

		/* must *not* sleep */
		evts_cpy_new = krealloc(*evts_cpy, (*evts_n + 1)
					* sizeof(**evts_cpy), GFP_ATOMIC);

		/* return whatever we achieve to copy */
		if (evts_cpy_new == NULL)
			break;

		*evts_cpy = evts_cpy_new;

		/* dequeue in order */			
		fops_evt = list_first_entry(fops_evts, struct fops_evt, n);
		si_evt_from_list = &fops_evt->base;

		memcpy(&evts_cpy_new[*evts_n], si_evt_from_list,
						sizeof(*si_evt_from_list));
		list_del(&fops_evt->n);
		kfree(fops_evt);
		++(*evts_n);

		if (list_empty(fops_evts))
			break;
	}
}

static ssize_t fops_read(struct file *f, char __user *buf, size_t buf_sz,
								loff_t *of)
{
	struct file_private_data *data;
	long r;
	struct si_evt *si_evts_cpy;
	u64 si_evts_cpy_n;
	ssize_t si_evts_cpy_sz;

	data = f->private_data;

	if (*of != 0)
		return -EINVAL;

	spin_lock_irq(&data->evts_lock);

	if (f->f_flags & O_NONBLOCK)
		if (list_empty(&data->evts)) {
			spin_unlock_irq(&data->evts_lock);
			return -EAGAIN;
		}

	r = wait_event_interruptible_lock_irq(data->evts_wq,
				!list_empty(&data->evts), data->evts_lock);
	if (r == -ERESTARTSYS) {
		spin_unlock_irq(&data->evts_lock);
		return (ssize_t)r;
	}

	si_evts_dequeue(&si_evts_cpy, &si_evts_cpy_n, &data->evts, buf_sz);

	spin_unlock_irq(&data->evts_lock);

	si_evts_cpy_sz = si_evts_cpy_n * sizeof(*si_evts_cpy);

	if (si_evts_cpy_n) {
		/* don't forget that copy_to_user might sleep */
		r = copy_to_user(buf, si_evts_cpy, si_evts_cpy_sz);

		kfree(si_evts_cpy);

		if (r)
			return -EFAULT;
	}
	return si_evts_cpy_sz;
}

static unsigned int fops_poll(struct file *f, struct poll_table_struct *wait)
{
	struct file_private_data *data;
	unsigned int mask;

	data = f->private_data;
	mask = 0;

	poll_wait(f, &data->evts_wq, wait);

	spin_lock_irq(&data->evts_lock);
	if (!list_empty(&data->evts))
		mask |= POLLIN | POLLRDNORM;
	spin_unlock_irq(&data->evts_lock);
	return mask;
}

struct file_operations fops = {
	.owner = THIS_MODULE,
	.unlocked_ioctl = unlocked_ioctl,
	.open = open,
	.release = release,
	.mmap = fops_mmap,
	.poll = fops_poll,
	.read = fops_read
};
